library(dplyr)
library(jsonlite)
library(urltools)
library(reticulate)
library(DT)
library(knitr)
library(networkD3)
library(clusterProfiler)
library(ReactomePA)
library(org.Hs.eg.db)
library(visNetwork)
library(tidyverse)
my_compound <- "N#Cc1ccccc1Cn3c(N2CCCCC2)cc(=O)[nH]c3=O"
baseur_sim <- "https://www.ebi.ac.uk/chembl/api/data/similarity/"
simil_cut_off = "/85"
pba <- fromJSON(paste0(baseur_sim, url_encode(my_compound), simil_cut_off, ".json?limit=50"), flatten=TRUE)
table_comp_match <- pba$molecules[,c(15,23,55,33,21,60)]
colnames(table_comp_match) <- c("ChEMBL_ID","similarity",
"Ro5_violations","withdrawn_info","name","canonical_smiles")
table_comp_match$name <- ifelse(is.na(table_comp_match$name), table_comp_match$ChEMBL_ID, table_comp_match$name)
#This code is aimed to generate the similarity matrix by using RDkit
res_df = pd.DataFrame(r.table_comp_match)
list_smiles1=list(res_df.canonical_smiles)
list_smiles=[Chem.MolFromSmiles(x) for x in list_smiles1]
list_smiles.append(my_mol)
list_ids=list(res_df.name)
list_ids.append("My_mol")
my_fps = [FingerprintMols.FingerprintMol(x) for x in list_smiles]
dists = []
simil = []
nfps = len(my_fps)
for j in range(0,nfps):
simil.append(DataStructs.BulkTanimotoSimilarity(my_fps[j],my_fps))
res_dis = DataStructs.BulkTanimotoSimilarity(my_fps[j],my_fps,returnDistance=1)
dists.append([1-x for x in res_dis])
simil_mat = np.array(simil)
dist_mat = np.array(dists)
df_dist = pd.DataFrame(dist_mat)
df_simil = pd.DataFrame(simil_mat)
df_simil.columns = list_ids
df_simil.index = list_ids
library(heatmaply)
d1 <- dist(py$df_simil, method = "manhattan", diag = FALSE, upper = TRUE)
d2 <- dist(t(py$df_simil), method = "manhattan", diag = FALSE, upper = TRUE)
c1 <- hclust(d1, method = "ward.D2", members = NULL)
c2 <- hclust(d2, method = "ward.D2", members = NULL)
heatmaply(as.matrix(py$df_simil), margins = c(40, 40, 0, 0),
Rowv=as.dendrogram(c2), Colv=as.dendrogram(c1),
colors = BrBG
)
# Are the compounds sharing a common sub-structure?
from rdkit.Chem import rdFMCS
from collections import namedtuple
from cairosvg import svg2png
mhs = list_smiles
[mhs[j].SetProp('_Name', str(list_ids[j])) for j in range(0, len(mhs))];
mcs = rdFMCS.FindMCS(mhs,threshold=0.8,completeRingsOnly=True,ringMatchesRingOnly=True)
patt = Chem.MolFromSmarts(mcs.smartsString)
AllChem.Compute2DCoords(patt)
matchingMols = [x for x in mhs if x.HasSubstructMatch(patt)]
legends=[x.GetProp("_Name") for x in matchingMols]
for m in matchingMols: AllChem.GenerateDepictionMatching2DStructure(m,patt)
##code for visualizing the sub-structure
hats = []
hbnds = []
for mm in matchingMols:
ats = mm.GetSubstructMatch(patt)
hats.append(ats)
bnds = []
for bnd in mm.GetBonds():
if bnd.GetBeginAtomIdx() in ats and bnd.GetEndAtomIdx() in ats:
bnds.append(bnd.GetIdx())
hbnds.append(bnds)
molsPerRow = 3
nRows = len(matchingMols)//molsPerRow
if len(matchingMols)%molsPerRow:
nRows+=1
panelx = 250
panely = 260
canvasx = panelx * molsPerRow
canvasy = panely * nRows
drawer = rdMolDraw2D.MolDraw2DSVG(canvasx,canvasy,panelx,panely)
drawer.DrawMolecules(matchingMols,highlightAtoms=hats,highlightBonds=hbnds, legends=legends)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
SVG(svg.replace("svg:",""))
svg2png(bytestring=svg,write_to='molecule2.png')
Result of the sub-structure search
comp_indi <- unique(table_comp_match$name)
df_indi <- dplyr::filter(meta_data_indications_final, name %in% comp_indi)
if(dim(df_indi)[1] == 0) {
print("No mechanism of action avaliable for these compounds!")
} else {
df_indi$efo_id <- gsub(':','_', df_indi$efo_id)
df_indi %>%
dplyr::mutate(target_chembl_id = paste0("<a target='_blank' href='https://www.ebi.ac.uk/chembl/beta/target_report_card/",target_chembl_id,"'>",target_chembl_id,"</a>")) %>%
dplyr::mutate(efo_id = paste0("<a target='_blank' href='https://www.ebi.ac.uk/efo/",url_encode(efo_id),"'>",efo_id,"</a>")) %>%
datatable(escape = F, rownames = F, options = list(autoWidth = T,scrollX = "200px"),caption = "MOA information in ChEMBL")
}
# Extracting target bioactivities from Binding assays
baseur_act <- "https://www.ebi.ac.uk/chembl/api/data/activity.json?molecule_chembl_id="
len_ids <- length(unique(table_comp_match$ChEMBL_ID))
pba2 <- lapply(table_comp_match$ChEMBL_ID, function(x){
fromJSON(paste0(baseur_act, x, "&assay_type=B&standard_units=nM","&limit=1000"),flatten=TRUE)
})
m <- lapply(1:len_ids, function(x) pba2[[x]]$activities)
m <- do.call(rbind, m)
m <- m[!is.na(m$pchembl_value),]
m <- m[m$potential_duplicate == "FALSE",]
m$molecule_pref_name <- ifelse(is.na(m$molecule_pref_name), m$molecule_chembl_id, m$molecule_pref_name)
target_bioacti <- m[,c(35,37,16,34,32,31,18,36,12)]
target_bioacti_final <- merge(target_ids[,c(1,3)], target_bioacti, by ="target_chembl_id")
target_bioacti_final$pchembl_value <- as.numeric(target_bioacti_final$pchembl_value)
target_bioacti_final %>%
dplyr::mutate(document_chembl_id = paste0("<a target='_blank' href='https://www.ebi.ac.uk/chembl/beta/document_report_card/",document_chembl_id,"'>",document_chembl_id,"</a>")) %>%
datatable(escape = F, rownames = F, options = list(autoWidth = T,scrollX = "50px"),caption = "Target bioactivities result")
# Analysis of the target bioactivities in the context of protein-protein interactions (PPIs)
string_api_url <- "https://string-db.org/api"
method <- "interaction_partners"
my_genes <- paste(unique(target_bioacti_final$gene_symbol), collapse = "%0d")
species <- "9606"
limit <- 20
my_app <- "www.awesome_app.org"
string <- fromJSON(paste0(string_api_url, "/json", "/interaction_partners?","identifiers=", my_genes,"&species=",species,"&limit=",limit,"&caller_identity=",my_app), flatten = T)
string <- string[string$escore >0,]
my_ppis <- string[,c(1,12,11)]
my_ppis$escore <- my_ppis$escore * 10
colnames(my_ppis) <- c("col_A", "col_B", "score")
my_ppis$type <- "PPI"
ppi_chembl <- rbind(graph_chembl_final, my_ppis)
symbol_ids <- unique(ppi_chembl$col_B)
entrez_ids = bitr(symbol_ids, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
enrich <- enrichPathway(gene=entrez_ids$ENTREZID,pvalueCutoff=0.05, readable=T)
enrich_table <- as.data.frame(enrich)
enrich_table_exp <- enrich_table %>% transform(geneID = strsplit(geneID, "/")) %>% unnest(geneID)
enrich_table_exp <- enrich_table_exp[!duplicated(enrich_table_exp$geneID),]
enrich_table_exp$score <- -log(enrich_table_exp$p.adjust)
enrich_table_exp <- enrich_table_exp[,c(2,9,10)]
colnames(enrich_table_exp) <- c("col_A","col_B","score")
enrich_table_exp$type <- "Pathway"
indi_comp <- df_indi[,c(5,1)]
indi_comp <- indi_comp[!duplicated(indi_comp),]
colnames(indi_comp) <- c("col_A","col_B")
indi_comp$score <- 10
indi_comp$type <- "Indication"
indi_tar <- df_indi[,c(5,3)]
indi_tar <- indi_tar[!duplicated(indi_tar),]
colnames(indi_tar) <- c("col_A","col_B")
indi_tar$score <- 10
indi_tar$type <- "Indication"
meta_inte <- rbind(ppi_chembl, enrich_table_exp, indi_comp, indi_tar)
meta_inte$score <- round(meta_inte$score, 2)
dotplot(enrich, showCategory=20)
genes_A <- my_ppis %>%
dplyr::distinct(col_A) %>%
dplyr::rename(label = col_A)
genes_B <- my_ppis %>%
dplyr::distinct(col_B) %>%
dplyr::rename(label = col_B)
genes_AB <- full_join(genes_A,genes_B, by = "label")
genes_AB$type <- "Gene"
comps <- graph_chembl_final[,c(1,4)]
comps <- comps[!duplicated(comps),]
names(comps)[1] <- "label"
paths_AB <- enrich_table_exp[,c(1,4)]
paths_AB <- paths_AB[!duplicated(paths_AB),]
names(paths_AB)[1] <- "label"
indi_net <- unique(indi_comp[,c(1,4)])
names(indi_net)[1] <- "label"
labels_type <- rbind(indi_net,comps, genes_AB, paths_AB)
nodes <- labels_type %>%
dplyr::mutate(id = 1:nrow(labels_type)) %>%
dplyr::select(id, everything())
my_format <- data.frame(type = c("Indication","ChEMBL","Pathway","Gene"), shape = c("box","diamond","dot","square"))
nodes <- merge(nodes, my_format, by = "type")
nodes <- nodes[,c(2,3,1,4)]
names(nodes)[3] <- "group"
nodes <- nodes[order(nodes$group),]
edges <- meta_inte %>%
dplyr::left_join(nodes[,1:2], by = c("col_A" = "label")) %>%
dplyr::rename(from = id)
edges <- edges %>%
dplyr::left_join(nodes[,1:2], by = c("col_B" = "label")) %>%
dplyr::rename(to = id)
edges <- dplyr::select(edges, from, to, score)
nodes_d3 <- dplyr::mutate(nodes, id = id - 1)
nodes_d3 <- nodes_d3[order(nodes_d3$id),]
edges_d3 <- dplyr::mutate(edges, from = from - 1, to = to - 1)
sankeyNetwork(Links = edges_d3, Nodes = nodes_d3, Source = "from", Target = "to", Value = "score", NodeID = "label", NodeGroup = "group", units = "", fontSize = 15, nodeWidth = 35, height = 800, width = 1200, sinksRight = F)
visNetwork(nodes, edges, height = "800px",width = "100%") %>%
visGroups(groupname = "ChEMBL", color = "red") %>%
visGroups(groupname = "Gene", color = "lightblue") %>%
visGroups(groupname = "Indication", color = "yellow") %>%
visGroups(groupname = "Pathway", color = "orange") %>%
visOptions(highlightNearest = list(enabled =TRUE, degree = 2)) %>%
visNodes(shadow = list(enabled = TRUE, size = 10))